import scrapy
from scrapy.spiders import CrawlSpider, Request, Rule
import string

from spider_demo.items import SpiderDemoItem
from scrapy.linkextractors import LinkExtractor


class ExampleSpider(scrapy.Spider):
    name = 'example'
    allowed_domains = ['movie.douban.com']
    start_urls = 'https://movie.douban.com/'
    # rules = (
    #     Rule(
    #         LinkExtractor(allow=("movie/subject/(\\d)+\\?from=rec$")),
    #         callback="parse",
    #         follow=True,
    #         process_request="cookie",
    #     ),
    # )

    def start_requests(self):
        topic = ['热门', '最新', '经典', '可播放', '豆瓣高分', '冷门佳片', '欧美', '韩国'
            , '日本', '动作', '喜剧', '爱情', '科幻', '悬疑', '恐怖', '成长']
        # for url in topic:
        #     print("url,{}",url)
        #     uri = self.start_urls+"explore#!type=movie&tag="+url+"&sort=time&page_limit=20&page_start=20"
        #     print("uri={}",uri)
        #     yield Request(uri)
        yield Request(
            "https://movie.douban.com/"
        )

    def parse(self, response):
        item = SpiderDemoItem()
        regex = '//*[@id="content"]/div/div[1]/div/div[4]/div'
        print("reponse=", response)
        list = response.xpath(regex).get()
        print("list=", list)

        for movie in list:
            id = movie.xpath('//*[@id="content"]/div/div[1]/div/div[4]/div/a[1]/div[data-id]')
            print("id={}", id)
            name = movie.xpath('//*[@id="content"]/div/div[1]/div/div[4]/div/a[1]/div/img[alt]')
            print("name={}", name)
            grade = movie.xpath('//*[@id="content"]/div/div[1]/div/div[4]/div/a[1]/p/strong')
            print("grade={}", grade)
            item['id'] = id
            item['name'] = name
            item['grade'] = grade
            return item

    # regex_alt = '//ul[@class=row]/li/span/a/span/img/text()'
    # alt = response.xpath(regex_alt).get()
    # item['name'] = alt
    # item['id'] = file
    # item['grade'] = 'bizhi'
    # return item


if __name__ == "__main__":
    topic = ['热门', '最新', '经典', '可播放', '豆瓣高分', '冷门佳片', '欧美', '韩国'
        , '日本', '动作', '喜剧', '爱情', '科幻', '悬疑', '恐怖', '成长']
    for a in topic:
        print(a)
